# encoding: utf-8
import re
import requests
# from fake_useragent import UserAgent
import json
from lxml import html
import io
import sys
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030')
etree = html.etree
# ua = UserAgent()
"""获取页面内容"""
kv = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
    'Cookie': '_zap=290554a9-028c-44dc-acdf-f18643c21ca1; d_c0="AOCQ4EoIJRSPTsO_k5IsVIPAdxZLbukq1qQ=|1638862585"; _9755xjdesxxd_=32; YD00517437729195:WM_TID=1Zx5CSZJqDRBVRBFARN6p4Ibt03QrHBT; capsion_ticket="2|1:0|10:1641291607|14:capsion_ticket|44:NmU3ZDFjOWY4NDQ5NGU4MWEzNDAzNGI5Y2MwOTdhMjg=|eff88152137c1c3af691c00064e31451466b33e368a55c7f2f66a991c8d797e1"; _xsrf=ff8c5e37-b6f7-4bae-a1cc-4176d5fe9d60; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1642126245,1642140604,1642140625,1642214996; YD00517437729195:WM_NI=tFZ/NLz3nkGhZKO7Nky4gua3zVYdjQlGrJ/GlNRLmVQ0sN7BvSx9GW8lMkgPGD8HsPdCpvx0EPk9gy98csXobNWN/Nj04Kx/e5QG8MUE8sPCG08+jTPq7jHtPnfk2CbTdGY=; YD00517437729195:WM_NIKE=9ca17ae2e6ffcda170e2e6eed4d55491938787f549f5e78eb7d14e828f9eaff47f9c91848fc75bb5f0bcbbe62af0fea7c3b92ab89a96d0cd48baeda28cc6649a9581d8cd61bbeca996e439ba8a9785e95bb099beabae4db0929eadf05e93f0bdafb64ab68db8dac65dfcb4838ce65c859cf8a9f25ca89dc097fb3c8790a5dae673fbe8ad94db65a29299afae46aaf0fcacfb25a7eb9fa8bb47979bf894d34985f0f8cce96094aff98bcb8094b2aa8cb149a28caca8dc37e2a3; captcha_session_v2="2|1:0|10:1642231199|18:captcha_session_v2|88:cTRUajRaK2VGL2o2WE04Y3FoVlNFT015RHpXS2tMWlhXSGw4bmVSanZNM3NqbVIxdVFOYnBTbzRNMlBHYTNsbA==|1c05ef3d2f9c5a73c4f29c926e199bf924080f2fa34738c0799dd4b9ad91ee28"; SESSIONID=w5n4D2WQTpBgCKnxMw1spoccaPijgVmCHV86KHnY2ff; JOID=VV4SAkMYXTfmBeWYPx7DaDRH67Mrbmlf207WzmhDLHSHT6H3Rrgv2oEL5Z8wRqvW8ytAUMhnsjEKB4nlVi2b-0Q=; osd=W18WCk4WXDPuCOuZOxbOZjVD474lb21X1kDXymBOInWDR6z5R7wn148K4Zc9SKrS-yZOUcxvvz8LA4HoWCyf80k=; __snaker__id=tYYQUosODlFJXM8M; gdxidpyhxdE=+qW7zNYo87+nc72/ZBWi2prqXzVoasw+5w+9q9kHhOBxLAHHCeX266XhWENuXZ7gtojtjLSJPAPnknrRbQJOh6Q4uvgdOIVuG1RCKNTSIxMhhbg3gVpliakhWr\\xS20vQsTJrbyJ3guhCpR6VuyRO7vksbMSNkr6yXdU1BdDdtsuUTQd:1642232103156; l_n_c=1; o_act=login; ref_source="other_https://www.zhihu.com/signin?next=/"; r_cap_id="YzYyOGRjZjEzNjlhNGFjNWFkNWE2NTQzMmFhNTdmOTQ=|1642231204|7e807dae3b78602541668205fa1c565950762363"; cap_id="YTAyMmQ1YTgyYWY0NGY4Mjg5YjZiN2NhZDUwZTg3MDY=|1642231204|8028dc05e4445f6ad08d30b73a7c62df42b5b12a"; l_cap_id="NWE4OGJjYmUwOTkzNDJmY2I2OWY1ZjgyZDcxY2JiMWI=|1642231204|67a716b694d5384cb13df262c1d9c787068497d2"; n_c=1; captcha_ticket_v2="2|1:0|10:1642231227|17:captcha_ticket_v2|704:eyJ2YWxpZGF0ZSI6Ik5BTlBfUW5BYmllMU9ISUViUEJjZHhNcENMdEVDRXBleC03ZVBuLU1WTDdibGhvVE1CbWVZYTZ3SGhNUEFWSUJaVHJTbDhVUzFVeGU5UzJtei15ZS00R01HaGtaRmZFLk9RRUo3cF92YnZpcnBCa2dmT0NucGpodDRsdzAuUF92UTBWcGRxMURkRUVFSDR0VXl0TXVpUV82cGp6RkV0VEV4LWJZNzdoTU9EQUlGVDJvMkVQLm5nMnFzQTF0RUl5LnBjRGViZzZmejZXdktmamVMV0hTbEsteVZVUXdSdmtoenZKOFJJX0FZUlJyZ0MwdzQtLXh1MmFFNjYwNXQtYXN6LW41NzZhYmhnN3BhWW5GUl8tYU9CaldqWUdsLkFsRy1SRDV5djRLVXNhejBuNWNFcWxtamFxRjFkaFhUX2JuaklFZHRKdjZ5QmNPQUVEWUEwb3VaakpUTi14WkhLcWptb1Y5NFhMNnZBVUhGQ3hQYTRTLkZGQnR6VWZrV1VvOThJOUZlMlpPZDdvSG5QdXVEeE1RQ0Z3NG9CZ1JWeXVzMkJncUI0SkJ2bXhNVlRoclJiWDB5NHdWc29MWEtFdldTelVqeDZjOWg3NTc4ekJvbHhadGlibWtYcTVEb2I1YzBlRHRveVg1T0N6SWdoS1NBWTdOZ04tTDhBeldJczZBMyJ9|9f1565a3bbc043ed047ad1bbf32a1cb58645341149fa1083289515eedb8d3dbd"; z_c0="2|1:0|10:1642231286|4:z_c0|92:Mi4xUkRINEJ3QUFBQUFBNEpEZ1NnZ2xGQ1lBQUFCZ0FsVk45c1BQWWdDc256a2duNWRzaHgwR3lOb1hTUmpUYzUzV2hR|ef0dd681cbabdd73515f625256531eabeeefe474f8c18ca2714cba21e1bdbde3"; unlock_ticket="AHAsGqbrMQ0mAAAAYAJVTf584mFXpc7VIG6JtQrPmJvqJhFqDCJrDw=="; tst=r; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1642231289; NOT_UNREGISTER_WAITING=1; KLBRSID=3d7feb8a094c905a519e532f6843365f|1642231305|1642231192'
}
kv1 = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
    # 'cookie':'自己cookie带上'
}
# 屏蔽词
k = ["原神", "开水烫蟑螂", "中国象棋", "王者荣耀", "基本款键盘", '大司马版本的"肌肉金轮"', "选购键盘", "明星颜值被吹上天", "iPhone", "李逍遥的这个皮肤是否值得入手", "python",
     "Python", "开头写一篇小说", "写一个故事", "为开头", "论文查重", "病娇", "打王者连王者", "明日方舟", "为开头写个故事", "摩尔庄园", "少年团", "利路修", "新鲜出炉的鳕鱼堡",
     "大司马因肌肉金轮的视频", "灵笼", "机械键盘", "耳屎", "追妻火葬场"]
c = []
def spider_1(e):
    resp = requests.get(e, headers=kv, timeout=15, verify=False).text
    # input(resp)
    it = json.loads(resp)
    # input(resp)
    # input(11)
    for i in it['data']:
        p = False
        if i['verb'] == 'TOPIC_ACKNOWLEDGED_ANSWER':
            for j in k:
                if j in i['target']['question']['title']:
                    p = True
                    c.append(i['target']['question']['title'])
            if not p:
                print(i['target']['question']['title'] + 'https://www.123wxwx.com/html/9/9621/77649206.html' * 2)
                soup = etree.HTML(i['target']['content'])
                print(''.join(soup.xpath('//text()')))
                print('https://www.123wxwx.com/html/9/9621/77649206.html' * 4)
    print(c)


def szz1():
    # try:="/html/9/9621/77325955.html 77315317
    # 		<dd><a href ="/html/9/9621/77328286.html
    # 		<dd><a href ="/html/9/9621/77338869.html
    # 		<dd><a href ="/html/9/9621/77343887.html"
    r = requests.get(f'https://www.123wxwx.com/html/9/9621', headers=kv1, verify=False).content
    # r = requests.get(f'https://www.123wxwx.com/html/9/9621/77295784.html', headers=kv1, proxies=proxie, verify=False).content
    sp = etree.HTML(r)
    li = sp.xpath('//span[@class="last"]/a/text()')
    zz = sp.xpath('//span[@class="last"]/a/@href')
    input(li+zz)
    for i in sp.xpath('//div[@id="content"]/text()'):
        print(f'https://www.123wxwx.com/html/9/9621/77605968' + i)
    # except:
    #     print('有点东西')
    #     szz1()e3QCY8IDcWlcHV12?clash=4&extend=1


if __name__ == '__main__':
    spider_1(
        'https://www.zhihu.com/api/v3/feed/topstory/recommend?session_token=8a6cb7553fec62ca3467d600605a7600&desktop=true&page_number=4&limit=6&action=down&after_id=17&ad_interval=-1')
    # szz1()
    # input('')
